{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import math"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "下面我们来准备一些数据定义"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = np.matrix([[0.5],[1.0]])\n",
    "t = np.matrix([[1.0],[0.0]])\n",
    "w1 = np.matrix([[0.1, 0.2],[0.3, 0.4]]).T\n",
    "w2 = np.matrix([[0.6, 0.7], [0.8, 0.9]]).T\n",
    "b1 = np.matrix([[0.5], [0.5]])\n",
    "b2 = np.matrix([[1.0], [1.0]])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "进行一次前向计算"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ 0.85]\n",
      " [ 1.  ]]\n"
     ]
    }
   ],
   "source": [
    "logit1 = w1*x + b1#wx+b\n",
    "print(logit1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ 0.70056714]\n",
      " [ 0.73105858]]\n"
     ]
    }
   ],
   "source": [
    "h1 = 1.0/(1.0+np.exp(-logit1))#pass the logit to activation\n",
    "print(h1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ 2.00518715]\n",
      " [ 2.14834972]]\n"
     ]
    }
   ],
   "source": [
    "logit2 = w2*h1 + b2#wx+b again\n",
    "print(logit2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ 0.88134062]\n",
      " [ 0.89551446]]\n"
     ]
    }
   ],
   "source": [
    "h2 = 1.0/(1.0+np.exp(-logit2))#activation\n",
    "print(h2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "y=h2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.408013101595\n"
     ]
    }
   ],
   "source": [
    "cost = np.sum(np.power((t-y), 2)/2)\n",
    "print(cost)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "forward>>>>>>>>\n",
      "x: [[ 0.5]\n",
      " [ 1. ]]\n",
      "w1: [[ 0.1  0.3]\n",
      " [ 0.2  0.4]]\n",
      "b1: [[ 0.5]\n",
      " [ 0.5]]\n",
      "w2: [[ 0.6  0.8]\n",
      " [ 0.7  0.9]]\n",
      "b2: [[ 1.]\n",
      " [ 1.]]\n",
      "logit1: [[ 0.85]\n",
      " [ 1.  ]]\n",
      "h1: [[ 0.70056714]\n",
      " [ 0.73105858]]\n",
      "logit2: [[ 2.00518715]\n",
      " [ 2.14834972]]\n",
      "h2: [[ 0.88134062]\n",
      " [ 0.89551446]]\n",
      "cost:\n",
      "0.408013101595\n",
      ">>>>>>>>>>end forward\n"
     ]
    }
   ],
   "source": [
    "print('forward>>>>>>>>')\n",
    "print('x:', x)\n",
    "print('w1:', w1)\n",
    "print('b1:', b1)\n",
    "print('w2:', w2)\n",
    "print('b2:', b2)\n",
    "print('logit1:', logit1)\n",
    "print('h1:', h1)\n",
    "print('logit2:', logit2)\n",
    "print('h2:', h2)\n",
    "print('cost:')\n",
    "print(cost)\n",
    "print('>>>>>>>>>>end forward')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "接下来我们计算一次反向传播"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<<<<<<<<<<backward\n",
      "delta_2 [[-0.01240932]\n",
      " [ 0.08379177]]\n",
      "delta_1 [[ 0.01074218]\n",
      " [ 0.01287516]]\n",
      "nabla_w1 [[ 0.00537109  0.01074218]\n",
      " [ 0.00643758  0.01287516]]\n",
      "nabla_b1 [[ 0.01074218]\n",
      " [ 0.01287516]]\n",
      "nabla_w2 [[-0.00869356 -0.00907194]\n",
      " [ 0.05870176  0.0612567 ]]\n",
      "nabla_b2 [[-0.01240932]\n",
      " [ 0.08379177]]\n",
      "------------end backward\n"
     ]
    }
   ],
   "source": [
    "print('<<<<<<<<<<backward')\n",
    "delta_2 =  np.multiply(y-t, np.multiply(y,(1-y)))#delta for layer 2(last layer)\n",
    "nabla_w2 = delta_2 * np.transpose(h1)#the gradient value should be applied to the weights of layer 2\n",
    "nabla_b2 = delta_2#the gradient value should be applied to the bias of layer 2\n",
    "#sp_1= np.matrix([[ 0.20977282], [ 0.19661193]])#the derivative of the activation function\n",
    "delta_1 =  np.multiply(np.transpose(w2)*delta_2, np.multiply(h1,(1-h1)))#delta for layer 1\n",
    "nabla_w1 = np.dot(delta_1, x.T)\n",
    "nabla_b1 = delta_1\n",
    "new_w2 = w2 - nabla_w2\n",
    "new_b2 = b2 - nabla_b2\n",
    "new_w1 = w1 - nabla_w1\n",
    "new_b1 = b1 - nabla_b1\n",
    "\n",
    "print('delta_2', delta_2)\n",
    "#print('sp_1:', sp_1)\n",
    "print('delta_1', delta_1)\n",
    "print('nabla_w1', nabla_w1)\n",
    "print('nabla_b1', nabla_b1)\n",
    "print('nabla_w2', nabla_w2)\n",
    "print('nabla_b2', nabla_b2)\n",
    "print('------------end backward')\n"
   ]
  },
  {
   "cell_type": "raw",
   "metadata": {},
   "source": [
    "如果一切顺利，得到的结果应该类似下面这样："
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "```\n",
    "forward>>>>>>>>\n",
    "('x:', matrix([[ 0.5],\n",
    "        [ 1. ]]))\n",
    "('w1:', matrix([[ 0.1,  0.3],\n",
    "        [ 0.2,  0.4]]))\n",
    "('b1:', matrix([[ 0.5],\n",
    "        [ 0.5]]))\n",
    "('w2:', matrix([[ 0.6,  0.8],\n",
    "        [ 0.7,  0.9]]))\n",
    "('b2:', matrix([[ 1.],\n",
    "        [ 1.]]))\n",
    "('logit1:', matrix([[ 0.85],\n",
    "        [ 1.  ]]))\n",
    "('h1:', matrix([[ 0.70056714],\n",
    "        [ 0.73105858]]))\n",
    "('logit2:', matrix([[ 2.00518715],\n",
    "        [ 2.14834972]]))\n",
    "('h2:', matrix([[ 0.88134062],\n",
    "        [ 0.89551446]]))\n",
    "cost:\n",
    "0.408013101595\n",
    ">>>>>>>>>>end forward\n",
    "<<<<<<<<<<backward\n",
    "('delta_2', matrix([[-0.01240932],\n",
    "        [ 0.08379177]]))\n",
    "('sp_1:', matrix([[ 0.20977282],\n",
    "        [ 0.19661193]]))\n",
    "('delta_1', matrix([[ 0.01074218],\n",
    "        [ 0.01287516]]))\n",
    "('nabla_w1', matrix([[ 0.00537109,  0.01074218],\n",
    "        [ 0.00643758,  0.01287516]]))\n",
    "('nabla_b1', matrix([[ 0.01074218],\n",
    "        [ 0.01287516]]))\n",
    "('nabla_w2', matrix([[-0.00869356, -0.00907194],\n",
    "        [ 0.05870176,  0.0612567 ]]))\n",
    "('nabla_b2', matrix([[-0.01240932],\n",
    "        [ 0.08379177]]))\n",
    "------------end backward```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
